# title:   Incentives for Non-participation: Absence in the United Kingdom House of Commons, 1997-2015
# content: script to reproduce results reported in the main paper, part 1
#          Tables 1-2, and Figures 1-3 ("between MP" analysis)
# author:  Zoltan Fazekas
# note:    models take a lot of time to run
#          session info included in a separate document
#          assumes data file to be located in the same folder as script

# for RStudio warnings
# !diagnostics off

library("lme4")
library("tidyverse")
library("gridExtra")
library("texreg")
library("ggrepel")
library("merTools")
load("absence_long.Rdata")

names(vm_m) ## mm stands form min_max rescaling, carried out within year

# Figure 1 (main text)
mp <- vm_m %>% group_by(mp_name_code_sep) %>%
  mutate(gvt_u = length(unique(gvt))) %>%
  filter(gvt_u == 1) %>%
  summarize(a   = mean(absence),
            gvt = unique(gvt))
# round(mean(mp$a[mp$gvt == 0]), 2)

p1 <- ggplot(mp, aes(x = a, group = gvt,
                     fill = factor(gvt))) +
  geom_density(alpha = 0.75, colour = "grey20") +
  theme_minimal() +
  scale_x_continuous("Absence proportion") +
  scale_y_continuous("Density") +
  scale_fill_manual("", values = c("grey20", "grey60"),
                    labels = c(paste0("Opposition: ", round(mean(mp$a[mp$gvt == 0]), 2)), 
                               paste0("Government: ", round(mean(mp$a[mp$gvt == 1]), 2)))) +
  theme(text = element_text(family = "serif"), 
        panel.background = element_rect(fill = "grey98", colour = NA),
        axis.text.x = element_text(size = 11),
        axis.text.y = element_text(size = 11),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 14),
        plot.title = element_text(size = 14),
        legend.position = c(0.8, 0.9)) +
  labs(title = "a) Distribution of absence for government and opposition") +
  theme(title = element_text(face = "bold"))

# Time
labs <- vm_m %>% group_by(elect_minmax) %>% 
  summarize(absence = mean(absence)) %>% filter(elect_minmax < 0.1 | elect_minmax > 0.9)

time <- vm_m %>% group_by(elect_minmax) %>% 
  summarize(absence = mean(absence)) 

# round(cor(time$elect_minmax, time$absence), 2)

p2 <- ggplot(time, aes(x = elect_minmax, y = absence)) +
  geom_point(alpha = 0.25) +
  stat_smooth(se = FALSE, method = "loess", colour = "black", size = 1.25) + 
  scale_y_continuous("Average aabsence") +
  scale_x_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1),
                     labels = c("Beginning\nof term", "", 
                                "Middle", "", "End\nof term")) +
  theme_minimal() +
  theme(text = element_text(family = "serif"), 
        panel.background = element_rect(fill = "grey98", colour = NA),
        axis.text.x = element_text(size = 11),
        axis.text.y = element_text(size = 11),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 14),
        plot.title = element_text(size = 14)) +
  labs(title = "b) Absence throughout the legislative term") +
  theme(title = element_text(face = "bold")) +
  annotate(geom = "text", x = 0.1, y = 1, family = "serif", fontface = 2,
           label = paste0("Correlation: ", round(cor(time$elect_minmax, time$absence), 2)))

# combine
grid.arrange(p1, p2, ncol = 2)
# note: change width + length for export/zoom (valid for all figures)


# Fit models for Table 1 (main text)
# (check for convergence + generate exportable output table)
# (in text customization not included)

# baseline
fit_reml_00_pc <- glmer(absence ~  1 + (1 | mp_name_code_sep),
                        data    = vm_m,
                        family  = binomial(link = "logit"),
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl   = list(maxfun = 2e6)))
# summary(fit_reml_00_pc)
# core predictors
fit_reml_01_pc <- glmer(absence ~  1 + elect_minmax + gvt + (1 | mp_name_code_sep),
                        data    = vm_m,
                        family  = binomial(link = "logit"),
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl   = list(maxfun = 2e6)))
# core predictors with varying slope for election prox
fit_reml_02_pc <- glmer(absence ~  1 + elect_minmax + gvt + 
                          (1 + elect_minmax | mp_name_code_sep),
                        data    = vm_m,
                        family  = binomial(link = "logit"),
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl   = list(maxfun = 2e6)))
# see convergence check below
# core with varying slope + gvt predictor of slope (cross-level interaction)
fit_reml_03_pc <- glmer(absence ~  1 + elect_minmax * gvt + (1 + elect_minmax | mp_name_code_sep),
                        data    = vm_m,
                        family  = binomial(link = "logit"),
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl   = list(maxfun = 2e6)))
# see convergence check below
# idem with year
fit_reml_04_pc <- glmer(absence ~  1 + elect_minmax * gvt + factor(year_code) +
                          (1 + elect_minmax | mp_name_code_sep),
                        data    = vm_m,
                        family  = binomial(link = "logit"),
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl   = list(maxfun = 2e6)))
# see convergence check below
# idem with all controls
fit_reml_05_pc <- glmer(absence ~  1 + elect_minmax * gvt + factor(year_code) +
                          gvt_bill +
                          factor(week_day_no) +
                          gvt + ret + const_maj_mm +
                          dist_west_mm + sen_elapsed_mm +
                          (1 + elect_minmax | mp_name_code_sep),
                        data    = vm_m,
                        family  = binomial(link = "logit"),
                        control = glmerControl(optimizer = "bobyqa",
                                               optCtrl   = list(maxfun = 2e6)))
# see convergence check below
# investigate convergence
# convergence checks: all good,  below threshold
derivs1 <- fit_reml_02_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_reml_03_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_reml_04_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_reml_05_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

# note: change to texreg if LaTeX is exported, for paper customization not included
# (same note for all tables)
screenreg(list(fit_reml_00_pc, fit_reml_01_pc, fit_reml_02_pc,
               fit_reml_03_pc, fit_reml_04_pc, fit_reml_05_pc),
          custom.model.names = c(
            "Baseline",
            "Core predictors",
            "Varying slope",
            "Interaction",
            "Legislature fixed effects",
            "All controls"))

# Figure 2, based on the fitted model
pdat <- expand.grid(elect_minmax = seq(0, 1, 0.025),
                    gvt_bill     = 1,
                    week_day_no  = 0,
                    gvt = c(0, 1),
                    ret = 0,
                    const_maj_mm = mean(vm_m$const_maj_mm),
                    dist_west_mm = mean(vm_m$dist_west_mm),
                    sen_elapsed_mm = mean(vm_m$sen_elapsed_mm),
                    year_code = 2, mp_name_code_sep = "NA")

# minor differences compared to published plot values appear depending on convergence/stopping point of
# the models/simulation component
preds <- predictInterval(fit_reml_05_pc, pdat, include.resid.var = FALSE,
                         level = 0.95, type = "probability")

preds <- cbind(pdat, preds)

plabs <- preds %>% filter(elect_minmax == 0 | elect_minmax == 1) %>% 
  mutate(labs = paste0(round(fit, 3),
                       " [",
                       round(lwr, 3),
                       ", ",
                       round(upr, 3),
                       "]"))

pres <- ggplot(preds, aes(x = elect_minmax, y = fit, ymin = lwr, ymax = upr,
                          group = factor(gvt), colour = factor(gvt))) +
  geom_ribbon(alpha = 0.5, fill = "grey70", colour = NA) +
  geom_line(size = 1.1) + 
  geom_point(data = plabs, inherit.aes = TRUE) +
  ggrepel::geom_text_repel(data = filter(plabs, elect_minmax == 0), 
                           aes(label = labs), size = 3,
                           inherit.aes = TRUE,
                           nudge_x = 0, family = "serif",
                           nudge_y = -0.025, segment.alpha = 0) +
  ggrepel::geom_text_repel(data = filter(plabs, elect_minmax == 1), 
                           aes(label = labs), size = 3,
                           inherit.aes = TRUE,
                           nudge_x = 0, family = "serif",
                           nudge_y = +0.025, segment.alpha = 0) +
  scale_y_continuous("", limits = c(0, 0.5)) +
  scale_x_continuous("", breaks = c(0, 0.5, 1),
                     labels = c("Beginning\nof term",
                                "Middle", "End\nof term")) +
  theme_minimal() +
  scale_colour_manual("", values = c("grey50", "black"), 
                      labels = c("Opposition", "Government"),
                      guide = FALSE) +
  theme(text = element_text(family = "serif"), 
        panel.background = element_rect(fill = "grey98", colour = NA),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle = 0, hjust = 1),
        axis.title.x = element_text(size = 10),
        plot.title = element_text(size = 10)) +
  labs(title = "Absence probability") +
  theme(title = element_text(face = "bold")) +
  annotate(geom = "text", x = 0.5, y = 0.35, family = "serif", colour = "grey50",
           label = "Opposition") +
  annotate(geom = "text", x = 0.5, y = 0.225, family = "serif", colour = "black",
           label = "Government")
pres

# Table 2 robustness checks
# retiring only
fit_rob_01_pc <- glmer(absence ~  1 + elect_minmax * gvt + factor(year_code) +
                         gvt_bill +
                         factor(week_day_no) +
                         gvt + const_maj_mm +
                         dist_west_mm + sen_elapsed_mm +
                         (1 + elect_minmax | mp_name_code_sep),
                       data    = dplyr::filter(vm_m, ret == 1),
                       family  = binomial(link = "logit"),
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl   = list(maxfun = 2e6)))

# not-retiring only
fit_rob_02_pc <- glmer(absence ~  1 + elect_minmax * gvt + factor(year_code) +
                         gvt_bill +
                         factor(week_day_no) +
                         gvt + const_maj_mm +
                         dist_west_mm + sen_elapsed_mm +
                         (1 + elect_minmax | mp_name_code_sep),
                       data    = dplyr::filter(vm_m, ret == 0),
                       family  = binomial(link = "logit"),
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl   = list(maxfun = 2e6)))
# three main parties only
fit_rob_03_pc <- glmer(absence ~  1 + elect_minmax * gvt + factor(year_code) +
                         gvt_bill +
                         factor(week_day_no) +
                         gvt + ret + const_maj_mm +
                         dist_west_mm + sen_elapsed_mm +
                         (1 + elect_minmax | mp_name_code_sep),
                       data    = dplyr::filter(vm_m, party != "Other"),
                       family  = binomial(link = "logit"),
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl   = list(maxfun = 2e6)))

# MPs across legislatures kept as one group: removed from code/data as names have been removed
# from replication data

# convergence checks: all good,  below threshold
derivs1 <- fit_rob_01_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_rob_02_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_rob_03_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

screenreg(list(fit_rob_01_pc, fit_rob_02_pc, fit_rob_03_pc))

# Figure 3: separate models for each legislature
#           figure in main text, model results in appendix
fit_reml_1997_pc <- glmer(absence ~  1 + elect_minmax *  gvt + 
                            gvt_bill +
                            factor(week_day_no) +
                            gvt + ret + const_maj_mm +
                            dist_west_mm + sen_elapsed_mm +
                            (1 + elect_minmax | mp_name_code_sep),
                          data    = dplyr::filter(vm_m, year == 1997),
                          family  = binomial(link = "logit"),
                          control = glmerControl(optimizer = "bobyqa",
                                                 optCtrl   = list(maxfun = 2e6)))
fit_reml_2001_pc <- glmer(absence ~  1 + elect_minmax *  gvt + 
                            gvt_bill +
                            factor(week_day_no) +
                            gvt + ret + const_maj_mm +
                            dist_west_mm + sen_elapsed_mm +
                            (1 + elect_minmax | mp_name_code_sep),
                          data    = dplyr::filter(vm_m, year == 2001),
                          family  = binomial(link = "logit"),
                          control = glmerControl(optimizer = "bobyqa",
                                                 optCtrl   = list(maxfun = 2e6)))
fit_reml_2005_pc <- glmer(absence ~  1 + elect_minmax *  gvt + 
                            gvt_bill +
                            factor(week_day_no) +
                            gvt + ret + const_maj_mm +
                            dist_west_mm + sen_elapsed_mm +
                            (1 + elect_minmax | mp_name_code_sep),
                          data    = dplyr::filter(vm_m, year == 2005),
                          family  = binomial(link = "logit"),
                          control = glmerControl(optimizer = "bobyqa",
                                                 optCtrl   = list(maxfun = 2e6)))
fit_reml_2010_pc <- glmer(absence ~  1 + elect_minmax *  gvt + 
                            gvt_bill +
                            factor(week_day_no) +
                            gvt + ret + const_maj_mm +
                            dist_west_mm + sen_elapsed_mm +
                            (1 + elect_minmax | mp_name_code_sep),
                          data    = dplyr::filter(vm_m, year == 2010),
                          family  = binomial(link = "logit"),
                          control = glmerControl(optimizer = "bobyqa",
                                                 optCtrl   = list(maxfun = 2e6)))
# convergence checks: all good,  below threshold
derivs1 <- fit_reml_1997_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_reml_2001_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_reml_2005_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

derivs1 <- fit_reml_2010_pc@optinfo$derivs
sc_grad1 <- with(derivs1,
                 solve(Hessian, gradient))
max(abs(sc_grad1))
max(pmin(abs(sc_grad1),abs(derivs1$gradient)))

# results table (appendix)
screenreg(list(fit_reml_1997_pc, fit_reml_2001_pc, fit_reml_2005_pc, fit_reml_2010_pc))

# create figure (Figure 3)
p1997 <- expand.grid(elect_minmax = seq(0, 1, 0.025),
                     gvt_bill     = 1,
                     week_day_no  = 0,
                     gvt = c(0, 1),
                     ret = 0,
                     const_maj_mm = mean(vm_m$const_maj_mm[vm_m$year_code == 0]),
                     dist_west_mm = mean(vm_m$dist_west_mm[vm_m$year_code == 0]),
                     sen_elapsed_mm = mean(vm_m$sen_elapsed_mm[vm_m$year_code == 0]), 
                     mp_name_code_sep = "NA",
                     year_code = 0)
p2001 <- expand.grid(elect_minmax = seq(0, 1, 0.025),
                     gvt_bill     = 1,
                     week_day_no  = 0,
                     gvt = c(0, 1),
                     ret = 0,
                     const_maj_mm = mean(vm_m$const_maj_mm[vm_m$year_code == 1]),
                     dist_west_mm = mean(vm_m$dist_west_mm[vm_m$year_code == 1]),
                     sen_elapsed_mm = mean(vm_m$sen_elapsed_mm[vm_m$year_code == 1]), 
                     mp_name_code_sep = "NA",
                     year_code = 1)
p2005 <- expand.grid(elect_minmax = seq(0, 1, 0.025),
                     gvt_bill     = 1,
                     week_day_no  = 0,
                     gvt = c(0, 1),
                     ret = 0,
                     const_maj_mm = mean(vm_m$const_maj_mm[vm_m$year_code == 2]),
                     dist_west_mm = mean(vm_m$dist_west_mm[vm_m$year_code == 2]),
                     sen_elapsed_mm = mean(vm_m$sen_elapsed_mm[vm_m$year_code == 2]), 
                     mp_name_code_sep = "NA",
                     year_code = 2)
p2010 <- expand.grid(elect_minmax = seq(0, 1, 0.025),
                     gvt_bill     = 1,
                     week_day_no  = 0,
                     gvt = c(0, 1),
                     ret = 0,
                     const_maj_mm = mean(vm_m$const_maj_mm[vm_m$year_code == 3]),
                     dist_west_mm = mean(vm_m$dist_west_mm[vm_m$year_code == 3]),
                     sen_elapsed_mm = mean(vm_m$sen_elapsed_mm[vm_m$year_code == 3]), 
                     mp_name_code_sep = "NA",
                     year_code = 3)

preds1997 <- predictInterval(fit_reml_1997_pc, p1997, include.resid.var = FALSE,
                             level = 0.95, type = "probability")
preds1997 <- cbind(p1997, preds1997)

preds2001 <- predictInterval(fit_reml_2001_pc, p2001, include.resid.var = FALSE,
                             level = 0.95, type = "probability")
preds2001 <- cbind(p2001, preds2001)

preds2005 <- predictInterval(fit_reml_2005_pc, p2005, include.resid.var = FALSE,
                             level = 0.95, type = "probability")
preds2005 <- cbind(p2005, preds2005)

preds2010 <- predictInterval(fit_reml_2010_pc, p2010, include.resid.var = FALSE,
                             level = 0.95, type = "probability")
preds2010 <- cbind(p2010, preds2010)

preds <- rbind(preds1997, preds2001, preds2005, preds2010)

preds$leg <- "1997-2001"
preds$leg[preds$year_code == 1] <- "2001-2005"
preds$leg[preds$year_code == 2] <- "2005-2010"
preds$leg[preds$year_code == 3] <- "2010-2015"
preds$elect_new <- preds$elect_minmax + preds$year_code

preds$elect_new[preds$elect_minmax == 0 & preds$year_code == 1] <- 1.01
preds$elect_new[preds$elect_minmax == 0 & preds$year_code == 2] <- 2.01
preds$elect_new[preds$elect_minmax == 0 & preds$year_code == 3] <- 3.01


temps <- rbind(preds[preds$elect_minmax == 0 & preds$year_code == 1, ],
               preds[preds$elect_minmax == 0 & preds$year_code == 2, ],
               preds[preds$elect_minmax == 0 & preds$year_code == 3, ])
temps$elect_new <- c(rep(1.005, 2),
                     rep(2.005, 2),
                     rep(3.005, 2))
temps$fit <- NA
temps$upr <- NA
temps$lwr <- NA

preds <- rbind(preds, temps)

pres_year <- ggplot(preds, aes(x = elect_new, y = fit, ymin = lwr, ymax = upr,
                               group = factor(gvt), colour = factor(gvt))) +
  geom_ribbon(alpha = 0.5, fill = "grey70", colour = NA) +
  geom_line(size = 1.25) + 
  scale_y_continuous("Absence\nprobability", limits = c(0, 0.6)) +
  scale_x_continuous("", breaks = c(0, 1, 2, 3),
                     labels = c("1997-2001", "2001-2005", "2005-2010", "2010-2015"),
                     position = "top") +
  theme_minimal() +
  scale_colour_manual("", values = c("grey50", "black"), 
                      labels = c("Opposition", "Government")) +
  theme(text = element_text(family = "serif"),
        panel.background = element_rect(fill = "grey98", colour = NA),
        axis.text.x = element_text(hjust = 0, face = "bold"),
        axis.text.y = element_text(size = 10),
        axis.title.y = element_text(size = 10, angle = 0, hjust = 1),
        axis.title.x = element_text(size = 10),
        plot.title = element_text(size = 12),
        legend.position = c(0.9, 0.95))

pres_year